#!/usr/bin/pypy
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import datetime as d
'''
用户加入course的时间
根据最早的时间的时间
'''

earliest_date = d.datetime(2013, 10, 27)

def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('log')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()
log, output = args['log'], args['output']


reader = csv.reader(open(log))

time_format = "%Y-%m-%dT%H:%M:%S"

user_latest_time = {}

line_no = 0
for enrollment_id,username,course_id,time,source,event,_object in reader:
    line_no += 1 
    if line_no == 1: continue
    _time = t.strptime(time, time_format)

    date = d.datetime(_time.tm_year, _time.tm_mon, _time.tm_mday)

    #key = "%s-%s" % (username, course_id)
    key = enrollment_id
    
    if key not in user_latest_time:
        user_latest_time[key] = date

    if date < user_latest_time[key]:
        user_latest_time[key] = date


writer = csv.writer(open(output, 'w'))
writer.writerow(["enrollment_id", "user_enter_course_day"])

for key, date in user_latest_time.items():
    n_days = (date - earliest_date).days
    writer.writerow([key, n_days])
